app.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. import psycopg2
  2. from selenium import webdriver
  3. from selenium.webdriver.chrome.service import Service
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.common.keys import Keys
  6. from selenium.webdriver.chrome.options import Options
  7. from selenium.common.exceptions import NoSuchElementException
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.support import expected_conditions as EC
  10. import os
  11. from flask import Flask, request, jsonify
  12. from waitress import serve
  13. import pandas as pd
  14. from io import StringIO
  15. # Set options for the Chromium browser
  16. chrome_options = Options()
  17. chrome_options.add_argument("--headless") # Optional: Run Chromium in headless mode
  18. chrome_options.add_argument("--no-sandbox")
  19. chrome_options.add_argument("--disable-dev-shm-usage")
  20. # Specify the path to the Chromium driver
  21. service = Service('/usr/bin/chromedriver')
  22. driver = None
  23. def extract(username: str, password: str):
  24. url_credentials = f'https://{username}:{password}@sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
  25. url = 'https://sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
  26. username_integer = int(username[2:])
  27. def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'):
  28. try:
  29. conn = psycopg2.connect(
  30. dbname=os.getenv("DBNAME"),
  31. user=os.getenv("DBUSER"),
  32. password=os.getenv("DBPASSWORD"),
  33. host=os.getenv("DBHOST"),
  34. port=os.getenv("DBPORT")
  35. )
  36. cur = conn.cursor()
  37. insert_query = """
  38. INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s))
  39. ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT;
  40. """
  41. cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html))
  42. conn.commit()
  43. return cur.query.decode('utf-8')
  44. except psycopg2.ProgrammingError as e:
  45. print(f"Error de sintaxis: {e}")
  46. except psycopg2.IntegrityError as e:
  47. print(f"Error de integridad: {e}")
  48. except Exception as e:
  49. print(f"Error: {e}")
  50. finally:
  51. cur.close()
  52. conn.close()
  53. def update_alumno_extraccion_error(error: str):
  54. try:
  55. conn = psycopg2.connect(
  56. dbname=os.getenv("DBNAME"),
  57. user=os.getenv("DBUSER"),
  58. password=os.getenv("DBPASSWORD"),
  59. host=os.getenv("DBHOST"),
  60. port=os.getenv("DBPORT")
  61. )
  62. cur = conn.cursor()
  63. update_query = """
  64. INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s)
  65. ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message,
  66. materias_html = DEFAULT, registrado = DEFAULT;
  67. """
  68. cur.execute(update_query, (username_integer, error[:255]))
  69. conn.commit()
  70. print("Data updated successfully")
  71. except psycopg2.ProgrammingError as e:
  72. print(f"Error de sintaxis: {e}")
  73. finally:
  74. cur.close()
  75. conn.close()
  76. try:
  77. driver.get(url_credentials)
  78. driver.get(url)
  79. # si no existe el elemento, ctl00_contenedor_control
  80. datos_html = driver.find_element(By.ID, 'ctl00_contenedor_control').get_attribute('innerHTML')
  81. elemento = WebDriverWait(driver, 3.5).until(
  82. EC.presence_of_element_located((By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico'))
  83. )
  84. elemento.click()
  85. # Get the HTML content of the materias element
  86. materias_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_divHAcademico').get_attribute('innerHTML')
  87. historial_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_gvMaterias').get_attribute('innerHTML')
  88. # materias_actuales_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_div13').get_attribute('innerHTML')
  89. historial_html_io = StringIO(f"<table>{historial_html}</table>")
  90. # Read the HTML table into a DataFrame
  91. df = pd.read_html(historial_html_io)[0]
  92. if 'GRUPO' not in df.columns:
  93. raise KeyError("Column 'GRUPO' not found in the DataFrame")
  94. # Convert the DataFrame to JSON
  95. json_result = df[df['GRUPO'] != 'Promedio:'].to_json(orient='records')
  96. # Connect to PostgreSQL database
  97. query = insert_alumno_extraccion(datos_html, materias_html, json_result)
  98. print("Data extracted successfully")
  99. return json_result
  100. except NoSuchElementException as e:
  101. update_alumno_extraccion_error(str(e))
  102. def se_puede_extraer():
  103. try:
  104. conn = conn = psycopg2.connect(
  105. dbname=os.getenv("DBNAME"),
  106. user=os.getenv("DBUSER"),
  107. password=os.getenv("DBPASSWORD"),
  108. host=os.getenv("DBHOST"),
  109. port=os.getenv("DBPORT")
  110. )
  111. cursor = conn.cursor()
  112. # SELECCIONAR ULTIMA planeacion
  113. query = """
  114. SELECT 1
  115. FROM alumno_extraccion_fecha
  116. WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin
  117. ORDER BY CREATED_AT DESC
  118. LIMIT 1;
  119. """
  120. # Ejecuta la consulta
  121. cursor.execute(query)
  122. result = cursor.fetchone()
  123. # Verifica si se obtuvo algún resultado
  124. exists = result is not None
  125. # Cierra el cursor y la conexión
  126. cursor.close()
  127. conn.close()
  128. return exists
  129. except Exception as e:
  130. print(f"Error: {e}")
  131. return False
  132. app = Flask(__name__)
  133. @app.route('/calificaciones', methods=['POST'])
  134. def main():
  135. global driver
  136. # Initialize the WebDriver
  137. driver = webdriver.Chrome(service=service, options=chrome_options)
  138. username = request.form.get('clave')
  139. password = request.form.get('password')
  140. se_puede = se_puede_extraer()
  141. if se_puede:
  142. query = extract(username, password)
  143. # Close the session
  144. driver.quit()
  145. return jsonify({"message": "Data extracted successfully", "en-fecha": se_puede})
  146. if __name__ == '__main__':
  147. serve(app, host='0.0.0.0', port=5000)