import os import time import requests # --- Configuración --- HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY") MODEL_ID = os.getenv("HUGGINGFACE_MODEL", "deepseek/deepseek-v3-0324") HF_API_URL = "https://router.huggingface.co/novita/v3/openai/chat/completions" if not HUGGINGFACE_API_KEY: raise ValueError("Error: La variable de entorno HUGGINGFACE_API_KEY no está definida.") # --- Throttling --- LAST_CALL_TIME = 0.0 MIN_INTERVAL_SECONDS = 5.0 # --- Fragmentación de la respuesta --- MAX_CHUNK_LENGTH = 200 DELAY_BETWEEN_CHUNKS = 1.0 def query_huggingface(prompt): """Envía un prompt estilo chat a HuggingFace Router y devuelve la respuesta.""" headers = { "Authorization": f"Bearer {HUGGINGFACE_API_KEY}", "Content-Type": "application/json" } payload = { "model": MODEL_ID, "messages": [ { "role": "system", "content": "You are a helpful assistant that always responds in neutral Latin American Spanish. Ignore usernames or nicknames like 'teraflops'. Focus only on the user's input." }, { "role": "user", "content": prompt } ], "temperature": 0.7, "max_tokens": 300 } try: response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=120) response.raise_for_status() return response.json()["choices"][0]["message"]["content"].strip() except requests.exceptions.Timeout: return "⏱️ El servidor de HuggingFace tardó demasiado en responder." except requests.exceptions.RequestException as e: return f"❌ Error al conectar con HuggingFace Router: {e}" def chunk_text(text, max_length): """Corta 'text' en fragmentos de longitud máxima 'max_length'.""" return [text[i:i + max_length] for i in range(0, len(text), max_length)] def run(*args): """Consulta el modelo y devuelve la respuesta troceada, aplicando throttling.""" global LAST_CALL_TIME if not args: return help() now = time.time() elapsed = now - LAST_CALL_TIME if elapsed < MIN_INTERVAL_SECONDS: wait_time = round(MIN_INTERVAL_SECONDS - elapsed, 1) return f"¡Espera {wait_time} seg antes de hacer otra consulta!" LAST_CALL_TIME = now prompt = " ".join(args) try: full_text = query_huggingface(prompt) except Exception as e: return f"Error al consultar HuggingFace: {e}" result = [] for fragment in chunk_text(full_text, MAX_CHUNK_LENGTH): result.append(fragment) time.sleep(DELAY_BETWEEN_CHUNKS) return "\n".join(result) def help(): return f""".ollama - Consulta el modelo '{MODEL_ID}' en HuggingFace usando el router. Ejemplos: !ollama ¿Qué es la entropía en física? !ollama Resume el argumento de Don Quijote en 3 líneas. """