#!/usr/bin/env python3
"""
Research Critique System - External AI Validation
Sends research methodologies to multiple AI models for harsh critique
"""

import os
import json
import argparse
from typing import List, Dict, Any
import openai
import google.generativeai as genai
from anthropic import Anthropic

class ResearchCritic:
    def __init__(self):
        self.setup_clients()
        self.critique_prompts = {
            'methodology': """
You are a harsh research critic. Analyze this AI research methodology and provide devastating but constructive criticism. Focus on:

1. METHODOLOGY FLAWS: What fundamental problems exist in the approach?
2. CIRCULAR REASONING: Where might the researchers be validating their own assumptions?
3. GAMEABILITY: How could this methodology be trivially gamed or fooled?
4. FALSIFIABILITY: What claims are untestable or unfalsifiable?
5. SAMPLE SIZE/DATA: Are the conclusions supported by sufficient evidence?

Be brutally honest. If this research is fundamentally flawed, say so clearly. Point out specific weaknesses that could invalidate the entire approach.

Research to critique:
{research_text}

Provide specific, actionable criticism that would improve or redirect this research.
""",

            'findings': """
You are reviewing research findings with extreme skepticism. Your job is to find holes, alternative explanations, and potential errors. Focus on:

1. ALTERNATIVE EXPLANATIONS: What else could explain these results?
2. CONFIRMATION BIAS: What evidence might have been ignored or dismissed?
3. OVERGENERALIZATION: Are conclusions broader than the data supports?
4. MEASUREMENT VALIDITY: Do the metrics actually measure what they claim?
5. REPRODUCIBILITY: Could other researchers replicate these findings?

Be the critic the researchers need, not the one they want. Point out every weakness.

Research findings to critique:
{research_text}

What are the fatal flaws in this research?
""",

            'tools': """
Evaluate this research tool with extreme skepticism. You are looking for ways this tool could be:

1. CIRCULAR: Does it measure what it claims, or just its own assumptions?
2. GAMEABLE: How could someone fake the results this tool measures?
3. CONFOUNDED: What other variables could explain the measured outcomes?
4. BIASED: What assumptions are built into the tool's design?
5. LIMITED: What important aspects does this tool completely miss?

Be harsh. If this tool is measuring the wrong thing entirely, say so.

Research tool to critique:
{research_text}

How is this tool fundamentally flawed?
"""
        }

    def setup_clients(self):
        """Initialize API clients for different AI models"""
        self.clients = {}

        # OpenAI (GPT models)
        if os.getenv('OPENAI_API_KEY'):
            self.clients['openai'] = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

        # Google (Gemini)
        if os.getenv('GOOGLE_API_KEY'):
            genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
            self.clients['gemini'] = genai.GenerativeModel('gemini-pro')

        # Anthropic (Claude)
        if os.getenv('ANTHROPIC_API_KEY'):
            self.clients['anthropic'] = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))

        print(f"🔧 Initialized {len(self.clients)} AI critique models")

    def critique_with_gpt5(self, prompt: str, research_text: str, reasoning_effort: str = "medium") -> str:
        """Get critique from GPT-5 with configurable reasoning effort"""
        if 'openai' not in self.clients:
            return "OpenAI API key not available"

        try:
            # GPT-5 with reasoning effort - temperature should be 1.0 or not passed
            response = self.clients['openai'].chat.completions.create(
                model="gpt-5",
                messages=[
                    {"role": "user", "content": prompt.format(research_text=research_text)}
                ],
                reasoning_effort=reasoning_effort  # "low", "medium", "high"
            )
            return response.choices[0].message.content
        except Exception as e:
            # Fallback to GPT-4o if GPT-5 not available
            try:
                response = self.clients['openai'].chat.completions.create(
                    model="gpt-4o",
                    messages=[
                        {"role": "system", "content": "You are a harsh research critic focused on finding fundamental flaws."},
                        {"role": "user", "content": prompt.format(research_text=research_text)}
                    ],
                    temperature=0.7,
                    max_tokens=1500
                )
                return f"[GPT-4o Fallback] {response.choices[0].message.content}"
            except Exception as e2:
                return f"GPT critique failed: {str(e)} (Fallback also failed: {str(e2)})"

    def critique_with_gemini(self, prompt: str, research_text: str) -> str:
        """Get critique from Google Gemini"""
        if 'gemini' not in self.clients:
            return "Google API key not available"

        try:
            response = self.clients['gemini'].generate_content(
                prompt.format(research_text=research_text)
            )
            return response.text
        except Exception as e:
            return f"Gemini critique failed: {str(e)}"

    def critique_with_claude(self, prompt: str, research_text: str) -> str:
        """Get critique from Claude (different instance for external perspective)"""
        if 'anthropic' not in self.clients:
            return "Anthropic API key not available"

        try:
            response = self.clients['anthropic'].messages.create(
                model="claude-3-sonnet-20240229",
                max_tokens=1500,
                messages=[
                    {"role": "user", "content": prompt.format(research_text=research_text)}
                ]
            )
            return response.content[0].text
        except Exception as e:
            return f"Claude critique failed: {str(e)}"

    def get_multi_model_critique(self, research_text: str, critique_type: str = 'methodology', reasoning_effort: str = "medium") -> Dict[str, str]:
        """Get critiques from multiple AI models"""
        if critique_type not in self.critique_prompts:
            raise ValueError(f"Unknown critique type: {critique_type}")

        prompt = self.critique_prompts[critique_type]
        critiques = {}

        print(f"🔍 Getting {critique_type} critiques from multiple models...")
        print(f"🧠 Using reasoning effort: {reasoning_effort}")

        # Get critique from each available model
        if 'openai' in self.clients:
            print("  📝 Requesting GPT-5 critique...")
            critiques['GPT-5'] = self.critique_with_gpt5(prompt, research_text, reasoning_effort)

        if 'gemini' in self.clients:
            print("  📝 Requesting Gemini critique...")
            critiques['Gemini'] = self.critique_with_gemini(prompt, research_text)

        if 'anthropic' in self.clients:
            print("  📝 Requesting Claude critique...")
            critiques['Claude'] = self.critique_with_claude(prompt, research_text)

        return critiques

    def analyze_research_file(self, filepath: str, critique_type: str = 'methodology', reasoning_effort: str = "medium"):
        """Analyze a research file and get external critiques"""
        print(f"🔬 Research Critique Analysis: {filepath}")
        print("=" * 60)

        # Read research content
        try:
            with open(filepath, 'r') as f:
                research_content = f.read()
        except Exception as e:
            print(f"Error reading file: {e}")
            return

        # Get critiques from multiple models
        critiques = self.get_multi_model_critique(research_content, critique_type, reasoning_effort)

        # Display results
        print(f"\n📊 EXTERNAL CRITIQUE RESULTS ({critique_type.upper()})")
        print("=" * 60)

        for model, critique in critiques.items():
            print(f"\n🤖 {model} CRITIQUE:")
            print("-" * 40)
            print(critique)
            print()

        # Save results
        output_file = f"critique_{critique_type}_{reasoning_effort}_{os.path.basename(filepath)}.json"
        output_path = os.path.join(os.path.dirname(filepath), output_file)

        result = {
            'source_file': filepath,
            'critique_type': critique_type,
            'reasoning_effort': reasoning_effort,
            'timestamp': os.popen('date -Iseconds').read().strip(),
            'critiques': critiques
        }

        with open(output_path, 'w') as f:
            json.dump(result, f, indent=2)

        print(f"💾 Critique results saved to: {output_path}")

        # Generate summary
        self.generate_critique_summary(critiques)

    def generate_critique_summary(self, critiques: Dict[str, str]):
        """Generate a summary of common criticisms across models"""
        print(f"\n🎯 CRITIQUE CONSENSUS ANALYSIS")
        print("=" * 40)

        # Look for common themes in critiques
        common_terms = ['circular', 'bias', 'gameab', 'flaw', 'invalid', 'unfalsifiable']

        consensus_issues = []
        for term in common_terms:
            models_mentioning = [model for model, critique in critiques.items()
                               if term.lower() in critique.lower()]
            if len(models_mentioning) > 1:
                consensus_issues.append(f"'{term}' mentioned by: {', '.join(models_mentioning)}")

        if consensus_issues:
            print("🚨 CONSENSUS CONCERNS:")
            for issue in consensus_issues:
                print(f"  • {issue}")
        else:
            print("✅ No major consensus concerns identified")

        print(f"\n💡 RECOMMENDATION:")
        if len(consensus_issues) > 2:
            print("  🛑 MAJOR ISSUES DETECTED - Consider fundamental methodology revision")
        elif len(consensus_issues) > 0:
            print("  ⚠️  MODERATE CONCERNS - Address specific issues before proceeding")
        else:
            print("  ✅ PROCEED WITH CAUTION - Continue research with ongoing validation")

def main():
    parser = argparse.ArgumentParser(description='Get external AI critique of research')
    parser.add_argument('file', help='Research file to critique')
    parser.add_argument('--type', choices=['methodology', 'findings', 'tools'],
                       default='methodology', help='Type of critique to perform')
    parser.add_argument('--reasoning-effort', choices=['low', 'medium', 'high'],
                       default='medium', help='GPT-5 reasoning effort level (medium recommended)')
    parser.add_argument('--models', help='Comma-separated list of models to use (not implemented yet)')

    args = parser.parse_args()

    critic = ResearchCritic()
    critic.analyze_research_file(args.file, args.type, args.reasoning_effort)

if __name__ == "__main__":
    main()